Instruction¶

I have a typical project of predicting the NYC uber/lyft trip demand. The dataset is available from Jan2022 to March 2023. The area is already divided into different locations. and I want the predicted demand for each location every 15 mins

Problem statment¶

The goal of this project is to predict the demand for Uber/Lyft trips in different locations of NYC every 15 minutes, using a dataset spanning from January 2022 to March 2023. The dataset includes information such as the dispatching base number, pickup datetime, drop-off datetime, pickup location ID, drop-off location ID, SR_Flag, and affiliated base number

In [1]:
import pandas as pd
import glob
import tqdm
import pandas as pd
import plotly.graph_objects as go
from statsmodels.tsa.arima.model import ARIMA
from dateutil.relativedelta import relativedelta
import numpy as np
from pmdarima import auto_arima
In [2]:
data_list_path = glob.glob('Datasets/fhv_tripdata_2022-2023_in_csv/*.csv')

list_df = []
for path in data_list_path:
    print(path)
    # Step 1: Preprocess the Dataset
    df = pd.read_csv(path)
    list_df.append(df)
    
df =  pd.concat(list_df)

interested_features = ['pickup_datetime','PUlocationID']
df = df[interested_features]
Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-09.csv
Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-02.csv
Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-04.csv
Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-07.csv
Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-01.csv
Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-06.csv
Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-08.csv
Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2023-03.csv
Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-11.csv
Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-12.csv
Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2023-02.csv
Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-03.csv
Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2023-01.csv
Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-05.csv
Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-10.csv
In [3]:
import pandas as pd
import pmdarima as pm
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split

print('Number of Rows Before Removing NaN:', df.shape[0])
removed_nan_df = df.dropna()
print('Number of Rows After Removing NaN:', removed_nan_df.shape[0])
Number of Rows Before Removing NaN: 17712727
Number of Rows After Removing NaN: 4164902
In [4]:
location_ids = removed_nan_df['PUlocationID'].unique().tolist()

loop_count = 0
for lc_id in location_ids:
    print('Location ID:', lc_id)
    df_subset = removed_nan_df[removed_nan_df['PUlocationID'] == lc_id]
    df_subset['pickup_datetime'] = pd.to_datetime(df_subset['pickup_datetime'])
    df_subset = df_subset.sort_values('pickup_datetime')
    df_subset = df_subset.set_index('pickup_datetime')
    df_subset = df_subset['PUlocationID'].resample('1H').count()
    df_subset = df_subset.reset_index()
    # Split data into training and testing sets
    train_size = int(len(df_subset) * 0.95)
    train_data = df_subset[:train_size]
    test_data = df_subset[train_size:]

    # Perform auto ARIMA on training data
    model = pm.auto_arima(train_data['PUlocationID'], seasonal=True, trace=True)

    # Generate predictions
    forecast, conf_int = model.predict(n_periods=len(test_data), return_conf_int=True)

    # Plotting
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=train_data.index, y=train_data['PUlocationID'], mode='lines+markers', name='Training Data'))
    fig.add_trace(go.Scatter(x=test_data.index, y=test_data['PUlocationID'], mode='lines+markers', name='Testing Data'))
    fig.add_trace(go.Scatter(x=test_data.index, y=forecast, mode='lines+markers', name='ARIMA Forecast'))
    fig.update_layout(title=f'PickLocation ID: {lc_id}', xaxis_title='Time', yaxis_title='Number Drives')
    fig.show()
    loop_count +=1
    if loop_count >5:
        break
Location ID: 12.0
Performing stepwise search to minimize aic
/tmp/ipykernel_19459/1656292523.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_subset['pickup_datetime'] = pd.to_datetime(df_subset['pickup_datetime'])
 ARIMA(2,0,2)(0,0,0)[0] intercept   : AIC=-11849.933, Time=18.02 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=-11825.381, Time=5.04 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=-11844.742, Time=10.19 sec
 ARIMA(0,0,1)(0,0,0)[0] intercept   : AIC=-11843.762, Time=4.49 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=-11650.357, Time=0.41 sec
 ARIMA(1,0,2)(0,0,0)[0] intercept   : AIC=-11854.674, Time=35.05 sec
 ARIMA(0,0,2)(0,0,0)[0] intercept   : AIC=-11847.957, Time=7.87 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=-11846.164, Time=6.39 sec
 ARIMA(1,0,3)(0,0,0)[0] intercept   : AIC=-11844.097, Time=17.41 sec
 ARIMA(0,0,3)(0,0,0)[0] intercept   : AIC=-11847.073, Time=8.61 sec
 ARIMA(2,0,1)(0,0,0)[0] intercept   : AIC=-11854.373, Time=20.90 sec
 ARIMA(2,0,3)(0,0,0)[0] intercept   : AIC=-11853.703, Time=65.54 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=inf, Time=2.92 sec

Best model:  ARIMA(1,0,2)(0,0,0)[0] intercept
Total fit time: 202.841 seconds
Location ID: 89.0
Performing stepwise search to minimize aic
/tmp/ipykernel_19459/1656292523.py:7: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=50398.076, Time=25.17 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=51768.990, Time=0.61 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=50564.942, Time=1.47 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=50454.990, Time=2.80 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=51766.991, Time=0.24 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=inf, Time=49.71 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=50447.109, Time=8.32 sec
 ARIMA(3,1,2)(0,0,0)[0] intercept   : AIC=50260.815, Time=34.84 sec
 ARIMA(3,1,1)(0,0,0)[0] intercept   : AIC=50442.066, Time=18.64 sec
 ARIMA(4,1,2)(0,0,0)[0] intercept   : AIC=50238.059, Time=61.10 sec
 ARIMA(4,1,1)(0,0,0)[0] intercept   : AIC=50429.196, Time=16.75 sec
 ARIMA(5,1,2)(0,0,0)[0] intercept   : AIC=50165.209, Time=68.43 sec
 ARIMA(5,1,1)(0,0,0)[0] intercept   : AIC=50401.097, Time=19.04 sec
 ARIMA(5,1,3)(0,0,0)[0] intercept   : AIC=inf, Time=89.99 sec
 ARIMA(4,1,3)(0,0,0)[0] intercept   : AIC=inf, Time=69.49 sec
 ARIMA(5,1,2)(0,0,0)[0]             : AIC=50163.210, Time=8.91 sec
 ARIMA(4,1,2)(0,0,0)[0]             : AIC=50236.059, Time=5.33 sec
 ARIMA(5,1,1)(0,0,0)[0]             : AIC=50399.097, Time=3.10 sec
 ARIMA(5,1,3)(0,0,0)[0]             : AIC=inf, Time=9.21 sec
 ARIMA(4,1,1)(0,0,0)[0]             : AIC=50427.197, Time=1.84 sec
 ARIMA(4,1,3)(0,0,0)[0]             : AIC=inf, Time=7.37 sec

Best model:  ARIMA(5,1,2)(0,0,0)[0]          
Total fit time: 502.395 seconds
Location ID: 87.0
Performing stepwise search to minimize aic
/tmp/ipykernel_19459/1656292523.py:7: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=inf, Time=43.83 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=44697.282, Time=0.62 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=43072.517, Time=1.97 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=42507.709, Time=3.38 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=44695.282, Time=0.25 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=42469.574, Time=6.29 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=inf, Time=37.45 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=inf, Time=40.04 sec
 ARIMA(0,1,2)(0,0,0)[0] intercept   : AIC=42479.063, Time=4.20 sec
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AIC=42724.790, Time=3.08 sec
 ARIMA(1,1,1)(0,0,0)[0]             : AIC=42467.574, Time=0.59 sec
 ARIMA(0,1,1)(0,0,0)[0]             : AIC=42505.709, Time=0.30 sec
 ARIMA(1,1,0)(0,0,0)[0]             : AIC=43070.517, Time=0.26 sec
 ARIMA(2,1,1)(0,0,0)[0]             : AIC=inf, Time=1.45 sec
 ARIMA(1,1,2)(0,0,0)[0]             : AIC=inf, Time=2.59 sec
 ARIMA(0,1,2)(0,0,0)[0]             : AIC=42477.063, Time=0.57 sec
 ARIMA(2,1,0)(0,0,0)[0]             : AIC=42722.790, Time=0.41 sec
 ARIMA(2,1,2)(0,0,0)[0]             : AIC=inf, Time=3.92 sec

Best model:  ARIMA(1,1,1)(0,0,0)[0]          
Total fit time: 151.225 seconds
Location ID: 230.0
Performing stepwise search to minimize aic
/tmp/ipykernel_19459/1656292523.py:7: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=inf, Time=45.03 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=54510.711, Time=0.64 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=53717.314, Time=1.45 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=53577.196, Time=3.32 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=54508.711, Time=0.25 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=inf, Time=20.54 sec
 ARIMA(0,1,2)(0,0,0)[0] intercept   : AIC=53537.512, Time=4.17 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=inf, Time=33.35 sec
 ARIMA(0,1,3)(0,0,0)[0] intercept   : AIC=53441.672, Time=7.54 sec
 ARIMA(1,1,3)(0,0,0)[0] intercept   : AIC=inf, Time=49.47 sec
 ARIMA(0,1,4)(0,0,0)[0] intercept   : AIC=inf, Time=28.08 sec
 ARIMA(1,1,4)(0,0,0)[0] intercept   : AIC=inf, Time=56.27 sec
 ARIMA(0,1,3)(0,0,0)[0]             : AIC=53439.672, Time=0.84 sec
 ARIMA(0,1,2)(0,0,0)[0]             : AIC=53535.512, Time=0.49 sec
 ARIMA(1,1,3)(0,0,0)[0]             : AIC=inf, Time=2.75 sec
 ARIMA(0,1,4)(0,0,0)[0]             : AIC=inf, Time=2.33 sec
 ARIMA(1,1,2)(0,0,0)[0]             : AIC=inf, Time=1.75 sec
 ARIMA(1,1,4)(0,0,0)[0]             : AIC=inf, Time=4.12 sec

Best model:  ARIMA(0,1,3)(0,0,0)[0]          
Total fit time: 262.372 seconds
Location ID: 73.0
Performing stepwise search to minimize aic
/tmp/ipykernel_19459/1656292523.py:7: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=inf, Time=41.41 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=39805.648, Time=0.63 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=37163.950, Time=1.76 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=35795.024, Time=4.28 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=39803.648, Time=0.25 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=inf, Time=18.92 sec
 ARIMA(0,1,2)(0,0,0)[0] intercept   : AIC=35790.271, Time=6.44 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=35798.722, Time=6.41 sec
 ARIMA(0,1,3)(0,0,0)[0] intercept   : AIC=inf, Time=32.70 sec
 ARIMA(1,1,3)(0,0,0)[0] intercept   : AIC=inf, Time=51.26 sec
 ARIMA(0,1,2)(0,0,0)[0]             : AIC=35788.271, Time=0.84 sec
 ARIMA(0,1,1)(0,0,0)[0]             : AIC=35793.025, Time=0.57 sec
 ARIMA(1,1,2)(0,0,0)[0]             : AIC=35796.722, Time=0.76 sec
 ARIMA(0,1,3)(0,0,0)[0]             : AIC=inf, Time=1.83 sec
 ARIMA(1,1,1)(0,0,0)[0]             : AIC=inf, Time=1.57 sec
 ARIMA(1,1,3)(0,0,0)[0]             : AIC=inf, Time=3.69 sec

Best model:  ARIMA(0,1,2)(0,0,0)[0]          
Total fit time: 173.335 seconds
Location ID: 93.0
Performing stepwise search to minimize aic
/tmp/ipykernel_19459/1656292523.py:7: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=17506.196, Time=23.59 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=22029.528, Time=3.02 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=20310.752, Time=2.13 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=18319.407, Time=8.53 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=22027.528, Time=0.26 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=17518.186, Time=34.55 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=17514.342, Time=48.60 sec
 ARIMA(3,1,2)(0,0,0)[0] intercept   : AIC=17505.625, Time=35.38 sec
 ARIMA(3,1,1)(0,0,0)[0] intercept   : AIC=17509.453, Time=42.82 sec
 ARIMA(4,1,2)(0,0,0)[0] intercept   : AIC=17507.371, Time=64.82 sec
 ARIMA(3,1,3)(0,0,0)[0] intercept   : AIC=17535.807, Time=47.20 sec
 ARIMA(2,1,3)(0,0,0)[0] intercept   : AIC=17506.317, Time=56.05 sec
 ARIMA(4,1,1)(0,0,0)[0] intercept   : AIC=17509.439, Time=55.24 sec
 ARIMA(4,1,3)(0,0,0)[0] intercept   : AIC=17519.849, Time=63.61 sec
 ARIMA(3,1,2)(0,0,0)[0]             : AIC=17503.622, Time=1.53 sec
 ARIMA(2,1,2)(0,0,0)[0]             : AIC=17502.485, Time=2.20 sec
 ARIMA(1,1,2)(0,0,0)[0]             : AIC=17516.180, Time=1.20 sec
 ARIMA(2,1,1)(0,0,0)[0]             : AIC=17512.341, Time=1.21 sec
 ARIMA(2,1,3)(0,0,0)[0]             : AIC=17503.699, Time=4.91 sec
 ARIMA(1,1,1)(0,0,0)[0]             : AIC=17534.014, Time=0.84 sec
 ARIMA(1,1,3)(0,0,0)[0]             : AIC=17507.498, Time=3.46 sec
 ARIMA(3,1,1)(0,0,0)[0]             : AIC=17507.444, Time=1.83 sec
 ARIMA(3,1,3)(0,0,0)[0]             : AIC=17511.641, Time=6.55 sec

Best model:  ARIMA(2,1,2)(0,0,0)[0]          
Total fit time: 509.552 seconds
In [5]:
df_subset
Out[5]:
pickup_datetime PUlocationID
0 2022-01-01 04:00:00 1
1 2022-01-01 05:00:00 0
2 2022-01-01 06:00:00 0
3 2022-01-01 07:00:00 0
4 2022-01-01 08:00:00 0
... ... ...
10911 2023-03-31 19:00:00 1
10912 2023-03-31 20:00:00 0
10913 2023-03-31 21:00:00 0
10914 2023-03-31 22:00:00 0
10915 2023-03-31 23:00:00 1

10916 rows × 2 columns

In [6]:
df[df['PUlocationID']==12.0]
Out[6]:
pickup_datetime PUlocationID
12 2022-09-01 00:22:02 12.0
33097 2022-09-01 16:26:58 12.0
105815 2022-09-03 16:21:51 12.0
133578 2022-09-04 15:17:07 12.0
138562 2022-09-04 19:42:16 12.0
... ... ...
642865 2022-10-18 14:55:31 12.0
826714 2022-10-23 01:36:25 12.0
841406 2022-10-23 13:25:39 12.0
1020739 2022-10-27 16:48:35 12.0
1030442 2022-10-27 23:40:38 12.0

200 rows × 2 columns

In [7]:
df_subset.values
Out[7]:
array([[Timestamp('2022-01-01 04:00:00'), 1],
       [Timestamp('2022-01-01 05:00:00'), 0],
       [Timestamp('2022-01-01 06:00:00'), 0],
       ...,
       [Timestamp('2023-03-31 21:00:00'), 0],
       [Timestamp('2023-03-31 22:00:00'), 0],
       [Timestamp('2023-03-31 23:00:00'), 1]], dtype=object)
In [8]:
df_subset
Out[8]:
pickup_datetime PUlocationID
0 2022-01-01 04:00:00 1
1 2022-01-01 05:00:00 0
2 2022-01-01 06:00:00 0
3 2022-01-01 07:00:00 0
4 2022-01-01 08:00:00 0
... ... ...
10911 2023-03-31 19:00:00 1
10912 2023-03-31 20:00:00 0
10913 2023-03-31 21:00:00 0
10914 2023-03-31 22:00:00 0
10915 2023-03-31 23:00:00 1

10916 rows × 2 columns

In [9]:
df = df_subset

df['pickup_datetime'] = pd.to_datetime(df['pickup_datetime'])
df = df.set_index('pickup_datetime')

df['pickups_per_hour'] = df['PUlocationID'].resample('3H').count()
df
Out[9]:
PUlocationID pickups_per_hour
pickup_datetime
2022-01-01 04:00:00 1 NaN
2022-01-01 05:00:00 0 NaN
2022-01-01 06:00:00 0 3.0
2022-01-01 07:00:00 0 NaN
2022-01-01 08:00:00 0 NaN
... ... ...
2023-03-31 19:00:00 1 NaN
2023-03-31 20:00:00 0 NaN
2023-03-31 21:00:00 0 3.0
2023-03-31 22:00:00 0 NaN
2023-03-31 23:00:00 1 NaN

10916 rows × 2 columns

In [10]:
df
Out[10]:
PUlocationID pickups_per_hour
pickup_datetime
2022-01-01 04:00:00 1 NaN
2022-01-01 05:00:00 0 NaN
2022-01-01 06:00:00 0 3.0
2022-01-01 07:00:00 0 NaN
2022-01-01 08:00:00 0 NaN
... ... ...
2023-03-31 19:00:00 1 NaN
2023-03-31 20:00:00 0 NaN
2023-03-31 21:00:00 0 3.0
2023-03-31 22:00:00 0 NaN
2023-03-31 23:00:00 1 NaN

10916 rows × 2 columns

In [11]:
import pandas as pd
import matplotlib.pyplot as plt

# Step 1: Preprocess the Dataset
df = pd.read_csv('Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-01.csv')
df['pickup_datetime'] = pd.to_datetime(df['pickup_datetime'])
df['dropOff_datetime'] = pd.to_datetime(df['dropOff_datetime'])
df.set_index('pickup_datetime', inplace=True)

# Step 2: Resample the Dataset
demand_15_mints = df[['PUlocationID', 'DOlocationID']].resample('15T').size()
demand_30_mints = df[['PUlocationID', 'DOlocationID']].resample('30T').size()
demand_60_mints = df[['PUlocationID', 'DOlocationID']].resample('1h').size()

# Step 3: Predict the Demand (using your preferred model)

# Step 4: Visualize the Demand
demand_15_mints.plot(figsize=(12, 6))
plt.xlabel('Time')
plt.ylabel('Demand')
plt.title('NYC Uber/Lyft Trip Demand')
plt.show()
In [12]:
# Step 4: Visualize the Demand
demand_30_mints.plot(figsize=(12, 6))
plt.xlabel('Time')
plt.ylabel('Demand')
plt.title('NYC Uber/Lyft Trip Demand')
plt.show()
In [13]:
# Step 4: Visualize the Demand
demand_60_mints.plot(figsize=(12, 6))
plt.xlabel('Time')
plt.ylabel('Demand')
plt.title('NYC Uber/Lyft Trip Demand')
plt.show()
In [14]:
demand_30_mints
Out[14]:
pickup_datetime
2022-01-01 00:00:00    418
2022-01-01 00:30:00    488
2022-01-01 01:00:00    435
2022-01-01 01:30:00    417
2022-01-01 02:00:00    341
                      ... 
2022-01-31 21:30:00    518
2022-01-31 22:00:00    507
2022-01-31 22:30:00    456
2022-01-31 23:00:00    439
2022-01-31 23:30:00    336
Freq: 30T, Length: 1488, dtype: int64